*Control for wealth? Countervailing forces with health type: precautionary savings
* vs. higher costs

*Control for age? Life cycle variation in health spending, unrelated to type
*Control for CURRENT disability? Seems like a risk of overfitting

set scheme s2mono

cd "$revdta"

****FIRST: Low and Pistaferr-type residual types:
use  ".\Full_Panel_2b.dta", clear
keep if sample_analytic==1
drop yrd* age2

gen part = lw != .

g all_sev=all_benefits*sev
g all_mod=all_benefits*mod
g all_welf=all_benefits*(year>=1996)						
g all_sev_welf=all_benefits*(year>=1996)*sev
g all_mod_welf=all_benefits*(year>=1996)*mod

tab year,g(yrd)
g age2=age^2/100	

sort newid year
foreach var of varlist lw sev mod age age2 white married yrd1-yrd21 {
	qby newid: g diff`var'=`var'-`var'[_n-1]
	}
qby newid: g gap=year-year[_n-1]
keep if gap==1 | gap == 2
*overall head selection equation:
dprobit part sev mod age age2 white married all_benefits all_sev all_mod all_welf all_sev_welf all_mod_welf yrd1-yrd21 , vce(cluster newid) 
predict xb0,xb
g lamda0=normalden(xb0)/normal(xb0)

qby newid: g lag1lamda0=lamda0[_n-1]
qby newid: g lag1xb0=xb0[_n-1]

g dyreff=0
g yreff=0
g dyreff_nosel=0
g yreff_nosel=0
g yreff_m1=0
g dyreff_m1=0
g yreff_m0=0
g dyreff_m0=0

*Overall Wage equation for the household head:
xi: reg difflw diffsev diffmod diffage diffage2   diffmarried diffwhite diffyrd* i.gap|lamda0 i.gap|lag1lamda0 ,vce(cluster newid)
*testparm lamda0-_IgapXlag1_21

replace dyreff=. if !e(sample)
replace yreff=. if !e(sample)

forvalues j=1(1)21	{
	replace dyreff=dyreff+_b[diffyrd`j']*diffyrd`j' if e(sample)
					}
gen g1=difflw-_b[diffsev]*diffsev-_b[diffmod]*diffmod-_b[diffage]*diffage-_b[diffage2]*diffage2-_b[diffmarried]*diffmarried-dyreff 	if e(sample)				

replace yreff = yreff+_b[_cons] if e(sample)
forvalues j=1(1)21	{
	replace yreff=yreff+_b[diffyrd`j']*yrd`j' if e(sample)
					}
	gen reslev=lw-_b[diffsev]*sev-_b[diffmod]*mod-_b[diffage]*age-_b[diffage2]*age2-_b[diffmarried]*diffmarried-yreff  if e(sample)

egen fix=mean(reslev),by(newid)
egen intercept = mean(yreff)
 
collapse fix intercept ,by(married newid educ birthy)
su fix  
xtile type_fix=fix ,nq(4)
replace type_fix=2 if type_fix >= 2 & type_fix <=3
replace type_fix=3 if type_fix ==4
replace type_fix=1 if fix==.						/*This assumes that people who are never observed at work comne from the bottom of the f distribution*/
sum fix if type_fix==1
replace fix = r(mean) if fix==.

bysort type_fix: sum fix intercept 

keep newid type_fix married
duplicates drop newid type_fix, force
tempfile types
save `types', replace

*NOTE: these fixed effects are DIFFERENT from the ones in LP's paper.
*This is mostly due to the new years, but also because of changes in
*sample of previous years implied by the new data (e.g., increasing education).



*SECOND: PRODUCTIVE TYPE ASSIGNED BASED ON WAGES:
use  ".\Full_Panel_2b.dta", clear
keep if sample_analytic==1 & educ <= 2
drop yrd* age2

g all_sev=all_benefits*sev
g all_mod=all_benefits*mod
g all_welf=all_benefits*(year>=1996)						
g all_sev_welf=all_benefits*(year>=1996)*sev
g all_mod_welf=all_benefits*(year>=1996)*mod

gen emp = hours >=1500 & hours!=. 
gen hsgrad=educ==2
gen healthy = mod == 0 & sev == 0

gen healthyemp = emp if healthy == 1

tab year,g(yrd)
g age2=age^2/100	

sort newid year
foreach var of varlist lw sev mod age age2 white married yrd1-yrd17 {
	qby newid: g diff`var'=`var'-`var'[_n-1]
	}
*qby newid: g gap=year-year[_n-1]
*keep if gap==1 | gap == 2

foreach var in lw emp hsgrad healthy mod sev married anyDI healthyemp{
reg `var' i.age i.year
predict `var'_res, res
levelsof age, local(ages)
foreach x of local ages{
quietly sum `var'_res if age == `x'
replace `var'_res = `var'_res/`r(sd)' if age==`x'
}
}

collapse (mean) *_res, by(newid)
sort lw_res
cluster kmeans lw_res, k(3) generate(type_prodwage) start(segments)
replace type_prodwage = 1 if type_prodwage == .

cluster kmeans lw_res emp_res healthy_res, k(3) generate(type_covs) start(segments)
replace type_covs = 1 if type_covs == .

cluster kmeans lw_res emp_res hsgrad_res healthy_res, k(3) generate(type_withed) start(segments)
replace type_withed = 1 if type_withed == .

cluster kmeans lw_res emp_res healthy_res married_res, k(3) generate(type_withmar) start(segments)
replace type_withmar = 1 if type_withmar == .

cluster kmeans lw_res emp_res healthy_res married_res anyDI_res, k(3) generate(type_model) start(segments)
replace type_model = 1 if type_model == .
*In case of model, assigning people to low employment/educ single type
*type 1: high disability, mostly single, low LFP/wage, often on DI
*type 2: medium disability, mostly married, low LFP/wage, occasionally on DI
*type 3: low disability, mostly married, high LFP/wage, rarely on DI

cluster kmeans lw_res emp_res hsgrad_res healthy_res married_res anyDI_res, k(3) generate(type_all) start(segments)
replace type_all = 1 if type_all == .
*for this type--assigning missings to similarly low-education, mixed marital status
*type 1: LOW EDUCATION, high dis, mixed married, lower LFP/wage - low type
*type 2: HIGH EDUCATION, high dis, mostly single, lower LFP/wage - bad early draw
*type 3: HIGH EDUCATION, low dis, mostly married, high LFP/wage - good early draw


cluster kmeans lw_res healthyemp_res, k(3) generate(type_stationary) start(segments)
replace type_stationary = 1 if type_stationary == .

foreach x in 0 0.5 1 1.5 2 5 10{
local y = `x'*10
gen healthyweight = healthyemp_res*`x'
cluster kmeans lw_res healthyweight, k(3) generate(type_stationary`y') start(segments)
replace type_stationary`y' = 1 if type_stationary`y' == .
drop healthyweight
}

tab type_covs type_prodwage
tab type_withed type_prodwage
tab type_withmar type_prodwage
tab type_covs type_withed 

keep newid type_* *_res
tempfile prodtype_wage
save `prodtype_wage', replace

*THIRD: PRODUCTIVE TYPE ASSIGNED BASED ON EARNINGS FROM FULL-TIME WORK:
use  ".\Full_Panel_2b.dta", clear
keep if sample_analytic==1
drop yrd* age2

gen part = lw != .

gen logly = log(ly) if hours >=1500 & hours !=.
reg lw i.age i.year i.DS
predict lw_res, res
collapse (mean) lw_res, by(newid)
sort lw
cluster kmeans lw_res, k(3) generate(type_prodly) start(segments)
keep newid type_prodly
tempfile prodtype_ly
save `prodtype_ly', replace



***FOURTH: HEALTH TYPE ASSIGNED BASED ON MEDICAL SPENDING:
*problem: uninformative about disabilities (see bottom plots)
use  ".\Full_Panel_2b.dta", clear
keep if sample_analytic==1
drop yrd* age2


*levelsof age, local(ages)
*foreach a of local ages{
* xtile wealthbin = w_with_h if age==`a', nquantiles(5)
* }
sum health, det
replace health = r(p95) if health > r(p95) & health !=.
 reg health i.age i.educ i.DS i.sp_DS
 predict health_res , res
 
 collapse (mean) health_res , by(newid) 
 sort health_res
 cluster kmeans health_res, k(2) generate(type_healthspend) start(segments)
 keep newid type_healthspend
tempfile healthtype_spend
 save `healthtype_spend', replace
 
 
 ***FIFTH: HEALTH TYPE ASSIGNED BASED ON HEALTH CONDITIONS:
 *problem: types observed at different ages
use  ".\Full_Panel_2b.dta", clear
keep if sample_analytic==1
drop yrd* age2
 
 
gen healthy = DS==0 & sp_DS==0

reg healthy i.age
predict healthy_res, res
collapse (mean) healthy_res, by(newid)
sort healthy_res
 cluster kmeans healthy_res, k(2) generate(type_healthdis) start(segments)

 keep newid type_healthdis
 tempfile healthtype_dis
 save `healthtype_dis', replace
 
 ****SIXTH: TYPE BASED ON WORK AND HEALTH, BROADER SAMPLE:
 use  ".\Full_Panel_2b.dta", clear
keep if sample_health==1
drop yrd* age2

g all_sev=all_benefits*sev
g all_mod=all_benefits*mod
g all_welf=all_benefits*(year>=1996)						
g all_sev_welf=all_benefits*(year>=1996)*sev
g all_mod_welf=all_benefits*(year>=1996)*mod

gen emp = hours >=1500 & hours!=. 
gen hsgrad=educ==2
gen healthy = mod == 0 & sev == 0

gen healthyemp = emp if healthy == 1

tab year,g(yrd)
g age2=age^2/100	

sort newid year
foreach var of varlist lw sev mod age age2 white married yrd1-yrd17 {
	qby newid: g diff`var'=`var'-`var'[_n-1]
	}
*qby newid: g gap=year-year[_n-1]
*keep if gap==1 | gap == 2

foreach var in lw emp hsgrad healthy mod sev married anyDI healthyemp{
reg `var' i.age i.year
predict `var'_res, res
levelsof age, local(ages)
foreach x of local ages{
quietly sum `var'_res if age == `x'
replace `var'_res = `var'_res/`r(sd)' if age==`x'
}
}

collapse (mean) *_res, by(newid)
sort lw_res

cluster kmeans lw_res healthyemp_res, k(3) generate(type_oldsample) start(segments)
replace type_oldsample = 1 if type_oldsample== .

keep newid type_* *_res
tempfile prodtype_oldsample
save `prodtype_oldsample', replace


 
use  ".\Full_Panel_2b.dta", clear
keep if sample_analytic==1
drop yrd* age2

gen emp = hours >=1500 & hours!=. 


merge m:1 newid using `types'
drop if _merge== 2
drop _merge

merge m:1 newid using `healthtype_spend'
drop _merge

merge m:1 newid using `healthtype_dis'
drop _merge

merge m:1 newid using `prodtype_wage'
drop _merge

merge m:1 newid using `prodtype_ly'
drop _merge

merge m:1 newid using `prodtype_oldsample'
drop _merge

gen hsgrad = educ==2
gen healthyemp = emp if mod==0&sev==0

bysort type_prodwage: sum mod sev age married lw emp healthyemp sp_lw hsgrad anyDI
bysort type_covs: sum mod sev age married lw emp healthyemp sp_lw hsgrad anyDI
bysort type_withed: sum mod sev age married lw emp healthyemp sp_lw hsgrad anyDI
bysort type_withmar: sum mod sev age married lw emp healthyemp sp_lw hsgrad anyDI
bysort type_model: sum mod sev age married lw emp healthyemp sp_lw hsgrad anyDI
bysort type_all: sum mod sev age married lw emp healthyemp sp_lw hsgrad anyDI
bysort type_stationary: sum mod sev age married lw emp healthyemp sp_lw hsgrad anyDI


bysort type_healthspend: sum age y DS year w_with_h educ health
tab DS type_healthspend, col


gen sp_mod = sp_DS==1
gen sp_sev = sp_DS==2

*for type based on self-reported health: check how many people are ALWAYS disabled:
gen healthy = sp_DS == 0 & DS==0
bysort newid: egen sharehealthy = mean(healthy)
bysort newid: egen numobs = count(newid)
*do I have support to estimate transition probabilities by age and self-reported
*health type? appears so:
tab age DS if type_healthdis==1
tab age DS if type_healthdis==2

preserve
collapse (count) year if type_prodwage != ., by(newid)
gen n = 1
sum year, det
local mean=`r(mean)'
local p50=`r(p50)'
collapse (sum) n, by(year)
egen N = sum(n)
replace n = n/N
sum n
local max = `r(max)' 
twoway bar n year, fintensity(0)  || (pci 0 `mean' `max' `mean', lwidth(0.5) lcolor(black) lpattern(solid)) (pci 0 `p50' `max' `p50', lwidth(0.5) lcolor(black) lpattern(dash)), legend(off)  yscale(range(0 `max')) ylabel(0[0.04]`max', labsize(vlarge)) xlabel(0[4]21, labsize(vlarge)) xtick(0[1]21) plotregion(color(white)) graphregion(color(white)) xtitle(Years Observed, size(vlarge)) ytitle(Number Households, size(vlarge))
graph export "../4. Figures/PSID_stats/household_panel_lengths.pdf", replace
restore

preserve
keep if lw != .
collapse (count) year if type_prodwage != ., by(newid)
gen n = 1
sum year, det
local mean=`r(mean)'
local p50=`r(p50)'
collapse (sum) n, by(year)
egen N = sum(n)
replace n = n/N
sum n
local max = `r(max)' 
twoway bar n year, fintensity(0)  || (pci 0 `mean' `max' `mean', lwidth(0.5) lcolor(black) lpattern(solid)) (pci 0 `p50' `max' `p50', lwidth(0.5) lcolor(black) lpattern(dash)), legend(off)  yscale(range(0 `max')) ylabel(0[0.04]`max', labsize(vlarge)) xlabel(0[4]21, labsize(vlarge)) xtick(0[1]21) plotregion(color(white)) graphregion(color(white)) xtitle(Years Observed, size(vlarge)) ytitle(Number Households, size(vlarge))
graph export "../4. Figures/PSID_stats/household_panel_lengths_work.pdf", replace
restore

preserve
collapse (mean) mod sev sp_mod sp_sev health if age >=20 & age <=72, by(type_healthspend age)
twoway (connected  mod age if type_healthspend==1) (connected   mod age if type_healthspend==2), legend(label(1 "Low Cost") label( 2 "High Cost")) ytitle(Moderate Disability)
twoway (connected  sev age if type_healthspend==1) (connected   sev age if type_healthspend==2), legend(label(1 "Low Cost") label( 2 "High Cost")) ytitle(Severe Disability)
restore


preserve
collapse (mean) mod sev sp_mod sp_sev health if age >=20 & age <=72, by(type_healthdis age)
twoway (connected  mod age if type_healthdis==1) (connected   mod age if type_healthdis==2), legend(label(1 "Low Health") label( 2 "High Health")) ytitle(Moderate Disability)
twoway (connected  sev age if type_healthdis==1) (connected   sev age if type_healthdis==2), legend(label(1 "Low Health") label( 2 "High Health")) ytitle(Severe Disability)

restore

preserve
collapse (mean) mod sev sp_mod sp_sev lw health if age >=20 & age <=72, by(type_prodwage age)
twoway (connected  mod age if type_prodwage==1) (connected   mod age if type_prodwage==2) (connected  mod age if type_prodwage==3), legend(label(1 "Low Prod") label( 2 "Med Prod") label( 3 "High Prod")) ytitle(Moderate Disability)
twoway (connected  sev age if type_prodwage==1) (connected   sev age if type_prodwage==2) (connected   sev age if type_prodwage==3), legend(label(1 "Low Prod") label( 2 "Med Prod") label( 3 "High Prod")) ytitle(Severe Disability)
restore

preserve
collapse (mean) mod sev sp_mod sp_sev health if age >=20 & age <=72, by(type_fix age)
twoway (connected  mod age if type_fix==1) (connected   mod age if type_fix==2) (connected  mod age if type_fix==3), legend(label(1 "Low Prod") label( 2 "Med Prod") label( 3 "High Prod")) ytitle(Moderate Disability)
twoway (connected  sev age if type_fix==1) (connected   sev age if type_fix==2) (connected   sev age if type_fix==3), legend(label(1 "Low Prod") label( 2 "Med Prod") label( 3 "High Prod")) ytitle(Severe Disability)
restore

keep newid year type_* lw_res healthyemp_res
duplicates drop newid type_*, force

estpost tabulate $type, 
esttab . using "$out\numtypes$typename.txt", cells("b") noobs nolines nonum nomtitles replace

save ".\intermediary\sp_temp_types", replace
